

# generate figure 2

# load packages
library(tidyverse) # data manipulation
library(quanteda) # text analysis
library(scales) # better breaks for plots
library(zoo) # work with dates
library(ggpubr) # combine different plots


# set working directory
setwd("~/replication_files/")

# load corpus
load("data/article_iv_corpus.Rdata")


# set search terms
natural_resource_dictionary <- dictionary(list(resources = c("natural resource", "natural resources", "extractive industry", "extractive industries", "oil","petroleum",
                                                             "crude oil", "gasoline", "diesel", "LNG", "natural gas",
                                                             "fuel", "fuels","energy", "refinery", "hydrocarbon", "mineral",
                                                             "mining", "mine", "mines", "copper", "gold", "diamond",
                                                             "iron", "steel", "phosphate", "eiti")))


##### GHANA
date_consult1 <- c("Apr 2005","Mar 2007","Apr 2008","May 2009","Mar 2011","Apr 2013","May 2014","July 2017","Oct 2019")

ghana <- article_iv_corpus %>%
  filter(country=="Ghana") %>%
  cbind(date_consult1) %>%
  mutate(date_pub = zoo::as.yearmon(date_pub),
         date_consult1 = zoo::as.yearmon(date_consult1))

# generate corpus
ghana_corpus <- corpus(ghana, text_field = "staff_appraisal") 

# pre-processing
ghana_tokens <- tokens(ghana_corpus)  %>%
  tokens_tolower() 

# run search on tokens
ghana_search <- tokens_lookup(ghana_tokens, natural_resource_dictionary, valuetype = "glob", verbose = TRUE)
 
# create a dfm with token search
search_hits1 <- dfm(ghana_search) 
 
# convert the resulting dfm to data frame
search_results1 <- quanteda::convert(search_hits1, to = "data.frame") %>% 
   rename(doc_name = doc_id) %>% 
   right_join(ghana, by = c("doc_name" = "doc_id")) %>%
  mutate(resources = ifelse(date_pub == "May 2014", 0.01,resources))

# generate plot
plot1 <- ggplot(data = search_results1, aes(x = date_consult1, y = resources)) + 
  geom_col() +
  labs(x = "Time", y = "Number of Natural Resource Terms", title = "(a) Ghana")  +
  theme_classic() +
  theme(legend.position="bottom", legend.title=element_blank(), plot.title = element_text(face="bold")) +
  scale_color_manual(values = c("grey65","black")) +
  scale_x_yearmon(breaks = search_results1$date_consult1) +
  scale_y_continuous(breaks=pretty_breaks())  + 
  geom_vline(xintercept=2007.72, color="#73D055FF", linetype = "dashed") +
  geom_vline(xintercept=2011.72, color="#73D055FF")



###### GUYANA
date_consult2 <- c("Jan 2010","Nov 2010","Oct 2013","Mar 2016","Mar 2017","May 2018","June 2019")

guyana <- article_iv_corpus %>%
  filter(country=="Guyana") %>%
  cbind(date_consult2) %>%
  mutate(date_pub = zoo::as.yearmon(date_pub),
         date_consult2 = zoo::as.yearmon(date_consult2))

# generate corpus
guyana_corpus <- corpus(guyana, text_field = "staff_appraisal") 

# pre-processing
guyana_tokens <- tokens(guyana_corpus)  %>%
  tokens_tolower() 

# run search on tokens
guyana_search <- tokens_lookup(guyana_tokens, natural_resource_dictionary, valuetype = "glob", verbose = TRUE)

# # create a dfm with token search
search_hits2 <- dfm(guyana_search) 

# convert the resulting dfm to data frame
search_results2 <- quanteda::convert(search_hits2, to = "data.frame") %>% 
  rename(doc_name = doc_id) %>% 
  right_join(guyana, by = c("doc_name" = "doc_id")) %>%
  mutate(resources = ifelse(date_pub == "Sep 2010", 0.01,resources))

# generate plot
plot2 <- ggplot(data = search_results2, aes(x = date_consult2, y = resources)) + 
  geom_col() +
  labs(x = "Time", y = "Number of Natural Resource Terms", title = "(b) Guyana")  +
  theme_classic() +
  theme(legend.position="bottom", legend.title=element_blank(), plot.title = element_text(face="bold")) +
  scale_color_manual(values = c("grey65","black")) +
  scale_x_yearmon(breaks = search_results2$date_consult2) +
  scale_y_continuous(breaks=pretty_breaks())  + 
  geom_vline(xintercept=2015.4, color="#73D055FF", linetype = "dashed") +
  geom_vline(xintercept=2019, color="#73D055FF")



# COMBINE PLOTS
ggarrange(plot1,plot2, nrow = 2) %>%
  ggexport(filename = "figures/fig2.pdf", width = 9, height = 10)


